/*NOTE: This is model code to illustrate the logic used to generate the datasets for analysis in the SCARP pilot.
This code is not intended to run as is - due to Epic restrictions on the sharing of intellectual property all 
of the table and attribute names have been "masked" with names enclosed in square brackets if they were pulled 
from Clarity (EPIC).*/

/*****************************************************
PROJECT:  BRIDGE C2 
PILOT: SCRAP
DATE: 7/22/2022

PROJECT DESCRIPTION: 
Use ASCEND approach to compare CRC, CVC and 
BC cancer prevention services in patients with
	No SDH Screening
	Documented Need
	Documented No Need
Stratified by SDH need: Food, Housing, Transportation
STUDY PERIOD: 7/1/2016-2/29/2020		


DETAILS (This Syntax): 
Pull independent variables for CRC_cohort

INPUT DATA: 
SCRAP.CRC_patients


OUTPUT DATA:
SCRAP.CRC_exclusions_LU
SCRAP.CRC_exclusions_detail
SCRAP.CRC_first_exclusion
scrap.CRC_patient_covariates


SECTIONS:
A. Pull exclusion dates and apply, update observation length;
B. Define SDH Screening status for food, housing and transportation insecurity
	No SDH Screening
	Documented Need
	Documented No Need
C. Pull demographics
	race_ethnicity: Hispanic, non-Hispanic Black, non-Hispanic white, Non-Hispanic other, no data
	Preferred language: English/non-English
	Age, primary payor at index
	FPL on or after Index
	Visits per year


*****************************************************/
options compress=yes reuse=yes;
libname SCRAP "e:\\sasroot\BRIDGE C2\SCRAP\";
libname RD odbc noprompt="dsn=research_dev;Trusted_Connection=yes" bulkload=yes;

/***************************************************************/


/***************************************************************/
/*A. Pull exclusion dates and apply, update observation length;*/
/***************************************************************/

options mprint symbolgen mlogic;
%let patients=scrap.SCRAP_CRC_PATIENTS;/*location of your denominator file*/
%let idname=raw_patid;/*name of the id variable that links to Clarity [patient identifier]*/
%let LU_output=scrap.CRC_exclusion_LU;/*output file of exclusion codes*/
%let end_date='29FEB2020'd;
%let crc_exclusion_detail=SCRAP.CRC_exclusions_detail;
%let first_CRC_exclude=SCRAP.CRC_First_exclusion;

%CRC_ex;

proc sql;
create table RD.jpo_crc_macro_patients as 
select * from &patients;


/*lu_output file created from internal code groupers*/

proc sql;
create table RD.jpo_CRC_macro_exclusion_codes as select * from &LU_output;
quit;

/*exclusion dates*/
/*from surgical history*/
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_shx as 
select * from connection to mycon (
     SELECT den.*
	,eap.[procedure name]	
  	,shx.[contact date]
	,shx.[SURGICAL HISTORY START DATE]
     ,shx.[SURGICAL HISTORY DATE]
     ,'Surgical Hx' AS Exclusion_Reason
    FROM research_dev.dbo.jpo_crc_macro_patients  AS den
     INNER JOIN clarity.dbo.[SURGICAL]        AS shx
      ON den.&idname = shx.[patient identifier]
     INNER JOIN research_dev.dbo.jpo_CRC_macro_exclusion_codes AS cpc
      ON shx.[procedure identifier] = cpc.grouper_records_numeric_id
	  left join clarity.dbo.[procedures records EAP] eap on shx.[procedure_identifier]=eap.[procedure_identifier]
	where cpc.compiled_context='EAP')
	;      
quit;


data exclude_shx (keep=raw_patid ADVANCE_patid [procedure name] exclude_date exclusion_reason);
set exclude_shx;
format exclude_date date9.;
exclude_date=datepart([surgical history start date]);
if exclude_date=. then exclude_date=datepart(contact_date);
if exclude_date>&end_date then delete;
run;

/*from medical history table*/
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_mhx as 
select * from connection to mycon (
     SELECT den.*
	,edg.[diagnosis name]	
  	,mhx.[contact date] 
	,mhx.[medical history start date]
     ,mhx.[MEDICAL History DATE]
     ,'Medical Hx' AS Exclusion_Reason
    FROM research_dev.dbo.jpo_crc_macro_patients  AS den
     INNER JOIN clarity.dbo.[medical]       AS mhx
      ON den.&idname = mhx.[patient identifier]
     INNER JOIN jpo_CRC_macro_exclusion_codes AS cpc
      ON mhx.[diagnosis identifier] = cpc.grouper_records_numeric_id
	  LEFT JOIN CLARITY.dbo.[diagnosis edg] edg
	  	on mhx.[diagnosis identifier]=edg.[diagnosis identifier]
		where cpc.compiled_context='EDG');      
quit;


data exclude_mhx (keep=raw_patid ADVANCE_patid [diagnosis name] exclude_date exclusion_reason);
set exclude_mhx;
format exclude_date date9.;
exclude_date=datepart([medical history start date]);
if exclude_date=. then exclude_date=datepart([contact date]);
if exclude_date>'30JUN2021'd then delete;
run;

/*from encounter diagnosis */
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_patenc_dx as 
select * from connection to mycon (
     SELECT den.*
	,edg.[diagnosis name]	
  	,pe.[contact date] 
     ,'Pat Enc Dx' AS Exclusion_Reason
    FROM research_dev.dbo.jpo_crc_macro_patients  AS den
     INNER JOIN clarity.dbo.[patient encounter]       AS pe
	 on den.&idname=pe.[patient identifier]
     INNER JOIN clarity.dbo.[patient encounter diagnosis]        AS pedx
      ON pe.[patient encounter contact serial number identifier] = pedx.[patient encounter contact serial number identifier]
     INNER JOIN research_dev.dbo.jpo_CRC_macro_exclusion_codes AS cpc
      ON pedx.[diagnosis identifier] = cpc.grouper_records_numeric_id
	  LEFT JOIN clarity.dbo.[diagnosis edg] edg
	  	on pedx.[diagnosis identifier]=edg.[diagnosis identifier]
		where cpc.compiled_context='EDG');      
quit;


data exclude_patenc_dx (keep=raw_patid ADVANCE_patid [diagnosis name] exclusion_reason exclude_date);
set exclude_patenc_dx;
format exclude_date date9.;
exclude_date=datepart([contact date]);
if exclude_date>&end_date then delete;
run;

/*from problem list*/
 proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_Plist as 
select * from connection to mycon (
     SELECT den.*
	,edg.[diagnosis name]	
  	,pl.[noted date]
	,pl.[date of entry]
     ,'Problem list dx' AS Exclusion_Reason
    FROM research_dev.dbo.jpo_crc_macro_patients  AS den
     INNER JOIN clarity.dbo.[problem list]        AS pl
      ON den.&idname = pl.[patient identifier]
     INNER JOIN research_dev.dbo.jpo_CRC_macro_exclusion_codes AS cpc
      ON pl.[diagnosis identifier] = cpc.grouper_records_numeric_id
	 INNER JOIN clarity.dbo.[diagnosis edg] edg  
		on pl.[diagnosis identifier]=edg.[diagnosis identifier]
		where cpc.compiled_context='EDG');      
quit; 

data exclude_plist (keep=raw_patid ADVANCE_patid [diagnosis name] exclude_date exclusion_reason);
set exclude_plist;
format exclude_date date9.;
exclude_date=datepart([noted date]);
if exclude_date=. then exclude_date=datepart([date of entry]);
if exclude_date>&end_date then delete;
run;

data &crc_exclusion_detail;
set exclude_mhx exclude_patenc_dx exclude_plist exclude_shx;
run;

proc sql;
create table &first_CRC_exclude as
select den.&idname, 
min(exclude_date) as exclude_date format date9.
from &crc_exclusion_detail den group by den.&idname;
quit;

proc datasets library=reschdev;
delete jpo_crc_macro_patients
jpo_CRC_macro_exclusion_codes;
quit;
%mend;


/*get last_encounter before exclude_date*/
/*(observation period runs from index visit to last encounter before exclusion)*/
proc sql;
create table last_enc as
select x.*,
max(e.contact_date) as last_encounter format date9.
from scrap.crc_first_exclusion x
left join scrap.scrap_potential_cohort_pc_enc e
on x.raw_patid=e.raw_patid
where e.contact_date<x.exclude_date
group by x.raw_patid,x.exclude_date ;

*merge exclusions to patients;
proc sql;
create table CRC_Pat as
select p.*,
x.exclude_date,
x.last_encounter as last_enc2,
d.sas_death_date format date9.
from SCRAP.SCRAP_CRC_patients p
left join last_enc x
on p.raw_patid=x.raw_patid
left join ochin.death d on p.raw_patid=d.raw_patid
where p.raw_patid^in(select distinct raw_patid from dropped_pat);

*truncate observation period at last_enc2;
data crc_pat;
set crc_pat;
format observation_end date9.;
where sas_death_date=. or sas_death_date>last_enc;
if last_enc2=. then observation_end=last_enc;
if exclude_date^=. or death_date^=. then do;
	if last_enc2<last_enc then
		observation_end=last_enc2;
	if last_enc2>=last_enc then
		observation_end=last_enc;
end;
observation_length=yrdif(index_visit, observation_end,'age');
run;

data crc_pat2;
set crc_pat;
if observation_length<1 and exclude_date^=1 then delete;
run;


/*****************************************************************************
B. Define SDH Screening status for food, housing and transportation insecurity
	No SDH Screening
	Documented Need
	Documented No Need
*******************************************************************************/

/*link patients to screens at their primary_clinic*/
proc sql;
create table pat_screens as
select 
p.raw_patid as raw_patid2,
p.delivery_site_id,
p.index_visit,
p.observation_end,
sdh.*,
domain='Housing Instability' as housing_screen,
domain='Food Insecurity' as food_screen,
domain='Transportation' as Transportation_screen,
sdh_positive_screen='1' as positive_screen
from crc_pat2 p
left join scrap.scrap_sdh_screening_3_domains sdh
on p.raw_patid=sdh.raw_patid and p.delivery_site_id=sdh.delivery_site_id
and sdh.screen_date between p.index_visit and p.observation_end
order by p.raw_patid, domain, screen_date;
quit;


*summarize observation period screens;
proc sql;
create table screen_summary as
select raw_patid2 as raw_patid,
delivery_site_id,
max(Food_screen) as screened_food,
max(food_screen*positive_screen) as pos_food,
max(Housing_screen) as screened_housing,
max(Housing_screen*positive_screen) as pos_housing,
max(Transportation_screen) as screened_transportation,
max(Transportation_screen*positive_screen) as pos_transportation
from pat_screens group by raw_patid2, delivery_site_id;

/*time to first screen*/
data first_screen (keep=raw_patid2 first_food first_housing first_transportation);
set pat_screens;
by raw_patid2 domain;
retain first_food first_housing first_transportation;
format first_food first_housing first_transportation date9.;
if first.raw_patid2 then do;
	first_food=.;
	first_housing=.;
	first_transportation=.;
end;
if first.raw_patid2 or first.domain then do;
	if domain='Food Insecurity' then first_food=screen_date;
	if domain='Housing Instability' then first_housing=screen_date;
	if domain='Transportation' then first_transportation=screen_date;
end;
if last.raw_patid2 then output;
run;

proc sql;
create table crc_pat3 as
select p.*,
s.screened_food,
f.first_food,
s.pos_food,
s.screened_housing,
f.first_housing,
s.pos_housing,
s.screened_transportation,
f.first_transportation,
s.pos_transportation
from crc_pat2 p left join screen_summary s
on p.raw_patid=s.raw_patid
left join first_screen f
on p.raw_patid=f.raw_patid2;
quit;

data crc_pat3;
set crc_pat3;
length SDH_food SDH_Housing SDH_Transportation $20;
time_to_food=first_food-index_visit;
time_to_housing=first_housing-index_visit;
time_to_transportation=first_transportation-index_visit;
if screened_food=0 then SDH_food='3_Never Screened';
else if screened_Food=1 then do;
	if pos_Food=0 then SDH_Food='2_Negative Screen';
	if pos_Food=1 then SDH_Food='1_Positive Screen';
end;
if screened_housing=0 then SDH_housing='3_Never Screened';
else if screened_housing=1 then do;
	if pos_housing=0 then SDH_housing='2_Negative Screen';
	if pos_housing=1 then SDH_housing='1_Positive Screen';
end;
if screened_transportation=0 then SDH_transportation='3_Never Screened';
else if screened_transportation=1 then do;
	if pos_transportation=0 then SDH_transportation='2_Negative Screen';
	if pos_transportation=1 then SDH_transportation='1_Positive Screen';
end;
run;


/*****************************************************************************
C. Pull demographics and encounter variables
	race_ethnicity: Hispanic, non-Hispanic Black, non-Hispanic white, Non-Hispanic other, no data
	Preferred language: English/non-English
	age at index
	primary payor at index
	FPL on or after Index
	PC visits per year in observation period

*******************************************************************************/

/*demographics*/
proc sql;
create table crc_pat4 as
select p.*,
d.hispanic_description,
d.race_description,
d.pat_pref_language_spoken_desc
from crc_pat3 p left join ochin.demographic d
on p.raw_patid=d.raw_patid;

data crc_pat4;
set crc_pat4;
if pat_pref_language_spoken_desc='English'
	then language_c='English';
else if pat_pref_language_spoken_desc='Spanish, Castilian'
	then language_c='Spanish';
else if pat_pref_language_spoken_desc^ in('English', 'Spanish, Castilian') 
	then language_c='Other';
if Hispanic_description='No' then do;
	if race_description='White' then race_ethnic='Nonhispanic White';
	if race_description='Black or African American' then race_ethnic='Nonhispanic Black';
	if race_description ^in('White','No information','Black or African American', 'Unknown','Refuse to answer')
		then race_ethnic='Nonhispanic Other';
end;
if Hispanic_description='Yes' then race_ethnic='Hispanic';
if race_ethnic=' ' then race_ethnic='Unknown';
run;

/*PC encounters*/
proc sql;
create table pc_enc as
select c.raw_patid,
count(distinct e.encounterid) as PC_encounters
from crc_pat4 c left join 
scrap.scrap_potential_cohort_pc_enc e
on c.raw_patid=e.raw_patid and
	e.contact_date between c.index_visit and c.observation_end
group by c.raw_patid;
create table crc_pat5 as
select c.*,
e.PC_encounters,
round(e.PC_encounters/observation_length,1) as yearly_visits
from crc_pat4 c left join pc_enc e
on c.raw_patid=e.raw_patid;

data crc_pat5;
set crc_pat5;
if yearly_visits<3 then visit_cat='1-2';
if yearly_visits>=3 and yearly_visits<5 then visit_cat='3-4';
if yearly_visits>=5 and yearly_visits<7 then visit_cat='5-6';
if yearly_visits>=7 then visit_cat='7+';
run;

/*index visit variables*/
proc sql;
create table encounters as
select p.raw_patid,
p.index_visit,
e.contact_date,
e.age_at_encounter_date,
e.FPL_percentage,
e.payor_type_research
from crc_pat5 p left join scrap.scrap_potential_cohort_pc_enc e
on p.raw_patid=e.raw_patid and
e.contact_date>=p.index_visit
order by p.raw_patid, e.contact_date;

data index;
set encounters;
by raw_patid;
if first.raw_patid then output;
run;

data first_fpl;
set encounters;
where fpl_percentage^=.;
by raw_patid;
if first.raw_patid then output;
run;

proc sql;
create table crc_pat6 as
select p.*,
i.age_at_encounter_date,
i.payor_type_research,
f.fpl_percentage
from crc_pat5 p left join index i
on p.raw_patid=i.raw_patid
left join first_fpl f on 
p.raw_patid=f.raw_patid;

data scrap.CRC_patient_covariates;
set crc_pat6;
if fpl_percentage=. then FPL_cat='No data';
if fpl_percentage^=. and FPL_percentage<=100 then FPL_cat='<=100';
if fpl_percentage>100 and FPL_percentage<=200 then FPL_cat='<=200';
if fpl_percentage>200 then FPL_cat='>200';
if payor_type_research=' ' then payor_type_research='Uninsured';
if age_at_encounter_date<40 then age_cat='23-39';
if age_at_encounter_date>=40 and age_at_encounter_date<50 then age_cat='40-49';
if age_at_encounter_date>=50 and age_at_encounter_date<65 then age_cat='50-64';
if age_at_encounter_date>=65 then age_cat='65-70';
run;

data SCRAP.CRC_patient_covariates;
set SCRAP.CRC_patient_covariates;
english=pat_pref_language_spoken_desc='English';
rename age_at_encounter_date=age_at_index_visit;
run; 

data SCRAP.CRC_patient_covariates;
set SCRAP.CRC_patient_covariates;
length obs_cat $10;
if years_observation<2 then obs_cat='<2';
if years_observation>=2 then obs_cat='2-3.7';
if age_cat='65-70' then age_cat='65-73';
run; 


